IPSES project
Introduction
To illustrate the usage of CytoTree on differential trajectory reconstruction of time-course FCS data, we used a flow cytometry dataset of ten-day hematopoietic differentiation from the hESC line HUES9. By adding different cytokine combinations on different days, HUES9 cells (CD90+CD49f+ on Day 0, D0) were directionally differentiated into mesodermal cells (FLK1+, D4), hemogenic endothelium (CD34+CD31+CD43-, D6) and hematopoietic stem/progenitor cells (HSPCs, CD34+CD43+CD38-CD45RA-CD90+, D8) in succession. Ten cell surface markers (CD90, CD49f, FLK1, CD34, CD31, CD73, CD43, CD45, CD45RA, and CD38) were used for the flow cytometry analysis to monitor the generation of these cells.
Preprocessing
# Loading packages
suppressMessages({
library(ggplot2)
library(LSD)
library(flowCore)
library(flowViz)
library(CytoTree)
library(stringr)
})
##############################
fcs.path <- "FCS/time_course/"
fcs.files <- paste0(fcs.path, "D", c(0,2,4,6,8,10), ".fcs")
set.seed(1)
fcs.data <- runExprsMerge(fcs.files, comp = F, transformMethod = "none", fixedNum = 2500)
# Refine colnames of fcs data
# for usecase 2
recol <- c(`FITC-A<CD43>` = "CD43", `APC-A<CD34>` = "CD34", `BV421-A<CD90>` = "CD90",
`BV510-A<CD45RA>` = "CD45RA", `BV605-A<CD31>` = "CD31", `BV650-A<CD49f>` = "CD49f",
`BV 735-A<CD73>` = "CD73", `BV786-A<CD45>` = "CD45", `PE-A<FLK1>` = "FLK1",
`PE-Cy7-A<CD38>` = "CD38")
colnames(fcs.data)[match(names(recol), colnames(fcs.data))] = recol
fcs.data <- fcs.data[, recol]
# Build an cyt object
# If you don't want to see the running log information, set verbose FALSE
day.list <- c("D0", "D2", "D4", "D6", "D8", "D10")
meta.data <- data.frame(cell = rownames(fcs.data),
stage = str_replace(rownames(fcs.data), regex("_.+"), "") )
meta.data$stage <- factor(as.character(meta.data$stage), levels = day.list)
meta.data$batch <- as.numeric(meta.data$stage)
markers <- c("CD43", "CD34", "CD90", "CD45RA", "CD31", "CD73", "CD45", "FLK1", "CD38", "CD49f")
cyt <- createCYT(raw.data = fcs.data, markers = markers,
meta.data = meta.data,
normalization.method = "log",
verbose = T)## 2020-06-20 01:09:03 Number of cells in processing: 15000
## 2020-06-20 01:09:03 rownames of meta.data and raw.data will be named using column cell
## 2020-06-20 01:09:03 Index of markers in processing
## 2020-06-20 01:09:03 Creating CYT object.
## 2020-06-20 01:09:03 Determining normalization factors
## 2020-06-20 01:09:03 Normalization and log-transformation.
## 2020-06-20 01:09:03 Build CYT object succeed
Trajectory
# Cluster cells by SOM algorithm
# Set random seed to make results reproducible
set.seed(5)
cyt <- runCluster(cyt, cluster.method = "som", xdim = 6, ydim = 5, verbose = T)## 2020-06-20 01:09:10 Calculating FlowSOM.
## 2020-06-20 01:09:10 Calculating FlowSOM completed.
# Do not perform downsampling
set.seed(1)
cyt <- processingCluster(cyt, downsampling.size = 1, force.resample = F)
# run Principal Component Analysis (PCA)
cyt <- runFastPCA(cyt, verbose = T)## 2020-06-20 01:09:10 Calculating PCA.
## 2020-06-20 01:09:11 Calculating PCA completed.
# run t-Distributed Stochastic Neighbor Embedding (tSNE)
set.seed(1)
cyt <- runTSNE(cyt, verbose = T)## 2020-06-20 01:09:11 Calculating tSNE.
## 2020-06-20 01:10:30 Calculating tSNE completed.
## 2020-06-20 01:10:30 Calculating Diffusion Map.
## 2020-06-20 01:10:30 Destiny determined an optimal global sigma: 0.865
## 2020-06-20 01:11:13 Calculating Diffusion Map completed
## 2020-06-20 01:11:13 Calculating Umap.
## 2020-06-20 01:13:21 Calculating Umap.
# build minimum spanning tree based on tsne
cyt <- buildTree(cyt, dim.type = "umap", dim.use = 1:2, verbose = T)## 2020-06-20 01:13:21 Calculating buildTree.
## 2020-06-20 01:13:21 Initialization for root.cells and leaf cells
## 2020-06-20 01:13:21 Calculating buildTree completed.
cyt@meta.data$branch.id <- factor(cyt@meta.data$branch.id, labels = LETTERS[1:max(cyt@meta.data$branch.id)])
###########################################
# This is visualization module
###########################################
color.exp <- c("#1B3361","#76C7FF","#EEEEEE","#FF987A","#6A0D28")
color.heat <- c("#1B3361","#1B3361","#76C7FF","#FFFFFF","#FF987A","#6A0D28","#6A0D28")
color.sam <- c("#50C1FF","#26418A","#398600","#ffd232","#ff7b4f","#700000")
color.rsam <- c("#50C1FF","#700000","#26418A","#398600","#ffd232","#ff7b4f")
color.time <- c("#F4D31D", "#FF3222","#7A06A0")
marker.plot <- "CD49f"
plot2D(cyt, item.use = c("UMAP_1", "UMAP_2"), color.by = marker.plot,
main = marker.plot, category = "numeric") +
scale_colour_gradientn(colors = color.exp)plot2D(cyt, item.use = c("UMAP_1", "UMAP_2"), color.by = "stage", main = "stage") +
scale_color_manual(values = color.sam)plotTree(cyt, color.by = marker.plot, show.node.name = F, cex.size = 1.5) +
scale_colour_gradientn(colors = color.exp)Pseudotime
###########################################
# Pseudotime
###########################################
cyt <- defRootCells(cyt, root.cells = c(11), verbose = T)## 2020-06-20 01:13:29 954 cells will be added to root.cells .
## 2020-06-20 01:13:29 Calculating Pseudotime.
## 2020-06-20 01:13:29 Pseudotime exists in meta.data, it will be replaced.
## 2020-06-20 01:13:29 The log data will be used to calculate pseudotime
## 2020-06-20 01:14:06 Calculating Pseudotime completed.
plot2D(cyt, item.use = c("UMAP_1", "UMAP_2"), category = "numeric",
size = 1, color.by = "pseudotime") +
scale_colour_gradientn(colors = color.time)# Tree plot
plotTree(cyt, color.by = "pseudotime", cex.size = 1.5) +
scale_colour_gradientn(colors = color.time)# denisty plot by different stage
plotPseudotimeDensity(cyt, adjust = 1) +
scale_color_manual(values = color.sam)##################
library(pheatmap)
exp.data <- fetchPlotMeta(cyt, markers = markers)
exp.data.plot <- exp.data[order(exp.data$pseudotime), ]
annotation_col = data.frame(Stage = exp.data.plot$stage)
rownames(annotation_col) <- exp.data.plot$cell
acolor <- color.sam
names(acolor) <- day.list
ann_colors <- list(Stage = acolor)
pheatmap(t(exp.data.plot[, markers]), cluster_rows = T, cluster_cols = F,
clustering_method = "ward.D", color = colorRampPalette(color.heat)(100),
annotation_col = annotation_col, annotation_colors = ann_colors,
scale = "row", fontsize_col = 0.01)pseudotime <- cyt@meta.data$pseudotime
pseudotime <- pseudotime[order(pseudotime)]
plot(pseudotime, ylab = "Pseudotime", pch = 16, cex = 1)Intermediate state analysis
###########################################
# Subset CYT
###########################################
cell.inter <- fetchCell(cyt, cluster.id = c(6,27,2,17,22,29,21,15,1,28,23))
cell.inter <- cell.inter[grep("D6|D8|D10", cell.inter)]
sub.cyt <- subsetCYT(cyt, cells = cell.inter)
set.seed(1)
sub.cyt <- runCluster(sub.cyt, cluster.method = "som", xdim = 3, ydim = 3, verbose = T)## 2020-06-20 01:14:35 Calculating FlowSOM.
## 2020-06-20 01:14:35 Calculating FlowSOM completed.
# Do not perform downsampling
set.seed(1)
sub.cyt <- processingCluster(sub.cyt, perplexity = 2, downsampling.size = 1, force.resample = F)
# run Principal Component Analysis (PCA)
sub.cyt <- runFastPCA(sub.cyt, verbose = T)## 2020-06-20 01:14:35 Calculating PCA.
## 2020-06-20 01:14:35 Calculating PCA completed.
## 2020-06-20 01:14:35 Calculating Diffusion Map.
## 2020-06-20 01:14:36 Destiny determined an optimal global sigma: 0.816
## 2020-06-20 01:14:39 Calculating Diffusion Map completed
# build minimum spanning tree based on tsne
sub.cyt <- buildTree(sub.cyt, dim.type = "raw", verbose = T)## 2020-06-20 01:14:39 Calculating buildTree.
## 2020-06-20 01:14:39 The log data will be used to calculate trajectory
## 2020-06-20 01:14:39 Initialization for root.cells and leaf cells
## 2020-06-20 01:14:39 Calculating buildTree completed.
## 2020-06-20 01:14:39 654 cells will be added to root.cells .
## 2020-06-20 01:14:39 Calculating Pseudotime.
## 2020-06-20 01:14:39 Pseudotime exists in meta.data, it will be replaced.
## 2020-06-20 01:14:39 The log data will be used to calculate pseudotime
## 2020-06-20 01:14:41 Calculating Pseudotime completed.
plot3D(sub.cyt, item.use = c("DC_2","DC_1","DC_3"),
size = 0.5, color.by = "stage", main = "Stage",
angle = 45, scale.y = 0.7,
color.theme = color.sam[4:6])plot3D(sub.cyt, item.use = c("DC_2","DC_1","DC_3"),
size = 0.5, color.by = marker.plot, main = marker.plot,
angle = 45, scale.y = 0.7, category = "numeric",
color.theme = c(color.heat))plot3D(sub.cyt, item.use = c("DC_2","DC_1","DC_3"),
size = 0.5, color.by = "pseudotime", main = "pseudotime",
angle = 45, scale.y = 0.7, category = "numeric",
color.theme = colorRampPalette(color.time)(500))Bug Reports
If there is any error in installing or librarying the CytoTree package, please contact us via e-mail forlynna@sjtu.edu.cn
Session information
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pheatmap_1.0.12 stringr_1.4.0 CytoTree_0.99.6 igraph_1.2.5
## [5] flowViz_1.52.0 lattice_0.20-41 flowCore_2.0.1 LSD_4.0-0
## [9] ggplot2_3.3.1
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.16 RUnit_0.4.32
## [3] tidyselect_1.1.0 RSQLite_2.2.0
## [5] AnnotationDbi_1.50.0 grid_4.0.0
## [7] ranger_0.12.1 BiocParallel_1.22.0
## [9] Rtsne_0.15 scatterpie_0.1.4
## [11] munsell_0.5.0 destiny_3.2.0
## [13] codetools_0.2-16 umap_0.2.5.0
## [15] withr_2.2.0 colorspace_1.4-1
## [17] Biobase_2.48.0 knitr_1.28
## [19] stats4_4.0.0 SingleCellExperiment_1.10.1
## [21] robustbase_0.93-6 vcd_1.4-7
## [23] VIM_6.0.0 TTR_0.23-6
## [25] labeling_0.3 GenomeInfoDbData_1.2.3
## [27] polyclip_1.10-0 bit64_0.9-7
## [29] farver_2.0.3 flowWorkspace_4.0.6
## [31] vctrs_0.3.1 generics_0.0.2
## [33] xfun_0.14 ggthemes_4.2.0
## [35] R6_2.4.1 GenomeInfoDb_1.24.0
## [37] RcppEigen_0.3.3.7.0 rmdformats_0.3.7
## [39] locfit_1.5-9.4 bitops_1.0-6
## [41] DelayedArray_0.14.0 scales_1.1.1
## [43] nnet_7.3-14 gtable_0.3.0
## [45] sva_3.36.0 RProtoBufLib_2.0.0
## [47] rlang_0.4.6 genefilter_1.70.0
## [49] scatterplot3d_0.3-41 flowUtils_1.52.0
## [51] splines_4.0.0 hexbin_1.28.1
## [53] BiocManager_1.30.10 yaml_2.2.1
## [55] abind_1.4-5 IDPmisc_1.1.20
## [57] RBGL_1.64.0 tools_4.0.0
## [59] bookdown_0.19 ellipsis_0.3.1
## [61] RColorBrewer_1.1-2 proxy_0.4-24
## [63] BiocGenerics_0.34.0 Rcpp_1.0.4.6
## [65] plyr_1.8.6 base64enc_0.1-3
## [67] zlibbioc_1.34.0 purrr_0.3.4
## [69] RCurl_1.98-1.2 FlowSOM_1.20.0
## [71] openssl_1.4.1 S4Vectors_0.26.1
## [73] zoo_1.8-8 SummarizedExperiment_1.18.1
## [75] haven_2.3.1 cluster_2.1.0
## [77] magrittr_1.5 ncdfFlow_2.34.0
## [79] data.table_1.12.8 RSpectra_0.16-0
## [81] openxlsx_4.1.5 gmodels_2.18.1
## [83] lmtest_0.9-37 RANN_2.6.1
## [85] pcaMethods_1.80.0 matrixStats_0.56.0
## [87] hms_0.5.3 evaluate_0.14
## [89] xtable_1.8-4 smoother_1.1
## [91] XML_3.99-0.3 rio_0.5.16
## [93] jpeg_0.1-8.1 mclust_5.4.6
## [95] readxl_1.3.1 IRanges_2.22.2
## [97] gridExtra_2.3 ggcyto_1.16.0
## [99] compiler_4.0.0 tibble_3.0.1
## [101] KernSmooth_2.23-17 crayon_1.3.4
## [103] htmltools_0.4.0 mgcv_1.8-31
## [105] corpcor_1.6.9 tidyr_1.1.0
## [107] RcppParallel_5.0.1 DBI_1.1.0
## [109] tweenr_1.0.1 MASS_7.3-51.6
## [111] boot_1.3-25 Matrix_1.2-18
## [113] car_3.0-8 gdata_2.18.0
## [115] parallel_4.0.0 GenomicRanges_1.40.0
## [117] forcats_0.5.0 pkgconfig_2.0.3
## [119] rvcheck_0.1.8 prettydoc_0.3.1
## [121] foreign_0.8-80 laeken_0.5.1
## [123] sp_1.4-2 xml2_1.3.2
## [125] annotate_1.66.0 XVector_0.28.0
## [127] digest_0.6.25 tsne_0.1-3
## [129] ConsensusClusterPlus_1.52.0 graph_1.66.0
## [131] rmarkdown_2.2 cellranger_1.1.0
## [133] edgeR_3.30.3 curl_4.3
## [135] gtools_3.8.2 ggplot.multistats_1.0.0
## [137] nlme_3.1-148 lifecycle_0.2.0
## [139] jsonlite_1.6.1 carData_3.0-4
## [141] BiocNeighbors_1.6.0 askpass_1.1
## [143] limma_3.44.3 pillar_1.4.4
## [145] DEoptimR_1.0-8 survival_3.2-3
## [147] glue_1.4.1 xts_0.12-0
## [149] zip_2.0.4 png_0.1-7
## [151] bit_1.1-15.2 Rgraphviz_2.32.0
## [153] ggforce_0.3.1 class_7.3-17
## [155] stringi_1.4.6 blob_1.2.1
## [157] RcppHNSW_0.2.0 CytoML_2.0.5
## [159] latticeExtra_0.6-29 memoise_1.1.0
## [161] dplyr_1.0.0 cytolib_2.0.3
## [163] knn.covertree_1.0 irlba_2.3.3
## [165] e1071_1.7-3
Version
0.99.6